Data
str(data)
## spec_tbl_df [18,723 x 20] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ room_id : num [1:18723] 10176931 8935871 14011697 6137978 18630616 ...
## $ survey_id : num [1:18723] 1476 1476 1476 1476 1476 ...
## $ host_id : num [1:18723] 49180562 46718394 10346595 8685430 70191803 ...
## $ room_type : chr [1:18723] "Shared room" "Shared room" "Shared room" "Shared room" ...
## $ country : logi [1:18723] NA NA NA NA NA NA ...
## $ city : chr [1:18723] "Amsterdam" "Amsterdam" "Amsterdam" "Amsterdam" ...
## $ borough : logi [1:18723] NA NA NA NA NA NA ...
## $ neighborhood : chr [1:18723] "De Pijp / Rivierenbuurt" "Centrum West" "Watergraafsmeer" "Centrum West" ...
## $ reviews : num [1:18723] 7 45 1 7 1 184 67 2 2 0 ...
## $ overall_satisfaction: num [1:18723] 4.5 4.5 0 5 0 4.5 5 0 0 0 ...
## $ accommodates : num [1:18723] 2 4 3 4 2 2 16 2 2 12 ...
## $ bedrooms : num [1:18723] 1 1 1 1 1 1 1 1 1 1 ...
## $ bathrooms : logi [1:18723] NA NA NA NA NA NA ...
## $ price : num [1:18723] 156 126 132 121 93 102 462 414 222 301 ...
## $ minstay : logi [1:18723] NA NA NA NA NA NA ...
## $ name : chr [1:18723] "Red Light/ Canal view apartment (Shared)" "Sunny and Cozy Living room in quite neighbours" "Amsterdam" "Canal boat RIDE in Amsterdam" ...
## $ last_modified : POSIXct[1:18723], format: "2017-07-23 13:06:27" "2017-07-23 13:06:23" ...
## $ latitude : num [1:18723] 52.4 52.4 52.3 52.4 52.4 ...
## $ longitude : num [1:18723] 4.89 4.9 4.94 4.89 4.85 ...
## $ location : chr [1:18723] "0101000020E610000033FAD170CA8C13403BC5AA41982D4A40" "0101000020E6100000842A357BA095134042791F4773304A40" "0101000020E6100000A51133FB3CC613403543AA285E2B4A40" "0101000020E6100000DF180280638F134085EE92382B304A40" ...
## - attr(*, "spec")=
## .. cols(
## .. room_id = col_double(),
## .. survey_id = col_double(),
## .. host_id = col_double(),
## .. room_type = col_character(),
## .. country = col_logical(),
## .. city = col_character(),
## .. borough = col_logical(),
## .. neighborhood = col_character(),
## .. reviews = col_double(),
## .. overall_satisfaction = col_double(),
## .. accommodates = col_double(),
## .. bedrooms = col_double(),
## .. bathrooms = col_logical(),
## .. price = col_double(),
## .. minstay = col_logical(),
## .. name = col_character(),
## .. last_modified = col_datetime(format = ""),
## .. latitude = col_double(),
## .. longitude = col_double(),
## .. location = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
# TOP Earning Hosts
host_earning <- data %>%
group_by(host_id) %>%
summarise(total_earning = sum(price)) %>%
arrange(-total_earning)
print(host_earning)
## # A tibble: 15,943 x 2
## host_id total_earning
## <dbl> <dbl>
## 1 48703385 29493
## 2 1464510 11397
## 3 8558897 11300
## 4 113977564 10098
## 5 517215 9143
## 6 107745142 8622
## 7 65859990 8581
## 8 46691672 7779
## 9 84453740 7412
## 10 22855069 6000
## # ... with 15,933 more rows
knitr::kable(
host_earning[1:10, ],
caption = "A knitr kable."
)
A knitr kable.
| 48703385 |
29493 |
| 1464510 |
11397 |
| 8558897 |
11300 |
| 113977564 |
10098 |
| 517215 |
9143 |
| 107745142 |
8622 |
| 65859990 |
8581 |
| 46691672 |
7779 |
| 84453740 |
7412 |
| 22855069 |
6000 |
#Room Type and their count grouped by Neighbourhood
rooms<-data %>%
group_by(neighborhood, room_type) %>%
count(room_type)
print(rooms)
## # A tibble: 61 x 3
## # Groups: neighborhood, room_type [61]
## neighborhood room_type n
## <chr> <chr> <int>
## 1 Bijlmer Centrum Entire home/apt 44
## 2 Bijlmer Centrum Private room 54
## 3 Bijlmer Centrum Shared room 1
## 4 Bijlmer Oost Entire home/apt 45
## 5 Bijlmer Oost Private room 52
## 6 Bos en Lommer Entire home/apt 836
## 7 Bos en Lommer Private room 149
## 8 Bos en Lommer Shared room 3
## 9 Buitenveldert / Zuidas Entire home/apt 184
## 10 Buitenveldert / Zuidas Private room 64
## # ... with 51 more rows
#Room Type Vs Bookings (Grouped by Neighbourhood)
ggplot(data=rooms, aes(x=room_type, y=n, fill=room_type)) +
geom_bar(stat="identity") +
geom_text(aes(label=n), vjust=-0.3, size=3)+
facet_wrap(~neighborhood) +
ylim(0,3000) +
labs(title = "Room Type Vs No. of Bookings (Grouped by Neighbourhood)")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())

#Neighbourhoods with Highest Bookings
neighborhood_bookings <-data %>%
group_by(neighborhood) %>%
count(neighborhood) %>%
arrange(-n)
print(neighborhood_bookings)
## # A tibble: 23 x 2
## # Groups: neighborhood [23]
## neighborhood n
## <chr> <int>
## 1 De Baarsjes / Oud West 3289
## 2 De Pijp / Rivierenbuurt 2378
## 3 Centrum West 2225
## 4 Centrum Oost 1730
## 5 Westerpark 1430
## 6 Noord-West / Noord-Midden 1418
## 7 Oud Oost 1169
## 8 Bos en Lommer 988
## 9 Oostelijk Havengebied / Indische Buurt 921
## 10 Watergraafsmeer 517
## # ... with 13 more rows
ggplot(data=neighborhood_bookings, aes(x=n, y=reorder(neighborhood,n), fill=n)) +
geom_bar(stat="identity") +
geom_text(aes(label=n),hjust=-0.2, size=3.2)+
xlim(0,3500)+
theme_minimal()

library(data.table)
## Warning: package 'data.table' was built under R version 4.1.2
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(dplyr)
library(formattable)
## Warning: package 'formattable' was built under R version 4.1.2
library(tidyr)
#Set a few color variables to make our table more visually appealing
customGreen0 = "#DeF7E9"
customGreen = "#71CA97"
customRed = "#ff7f7f"
customRed0 = "#ffdfdf"
a1<- data %>%
group_by(room_type) %>%
summarise(price = mean(price))
a2<-data %>%
group_by(room_type) %>%
count(room_type)
room_avg_price<- merge(a1,a2)
formattable(room_avg_price, align =c("l","c","c","c","r"), list(
`room_type` = formatter("span", style = ~ style(color = "grey",font.weight = "bold")),
`n`= color_tile(customGreen0, customGreen),
`price`= color_tile(customRed0, customRed)
))
|
room_type
|
price
|
n
|
|
Entire home/apt
|
181.5471
|
14978
|
|
Private room
|
106.8705
|
3682
|
|
Shared room
|
103.6825
|
63
|